This takes as input a time-series csv file generated by Google Earth Engine of cloud-filtered Landsat Surface Reflectance NDVI from Landsats 5, 7, and 8 on a geometry such as point or polygon. The getLogs function counts the days between the first and last observations above a threshold value, for each year of data. Thus for winter growing seasons this will not be adequate.
Data can be sparse for some years due to cloudiness or missing Landsat data.
In [1]:
import pandas as pd, numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
%matplotlib inline
from Earth Engine. For example, this script generates a time series from Landsat 5, 7, and 8 Surface Reflectance products on plot GBRO1 (pasture) or GBRO2 (cropland) in North Dakota.
https://code.earthengine.google.com/070a15b53111ad808b01e0ace1de433b
plot locations:
In [2]:
#function to count days above threshold NDVI from time series data
def getLogs(self, th): #args: dataframe, threshold NDVI value
df=self
df['next']=df.nd.shift(-1)
df['prev']=df.nd.shift(1)
#find first above threshold
df['first'] = ((df.nd>=th)&((df.prev<th)|(df.prev.isnull())))
#find last above threshold
df['last'] = ((df.nd>=th)&((df.next<th)|(df.next.isnull())))
#credit 16 days for single observations above threshold
singles = df[(df['first']==True) & (df['last']==True)].nd.count()*16
#now remove these
df = df[~((df['first']==True) & (df['last']==True))]
#remove all but first and last
df = df[((df['first']==True) | (df['last']==True))]
#get intervals between first and last
df['nextdate'] = df.date.shift(-1)
df['inc'] = (df['nextdate']-df['date']).dt.days #increment in days
return int(df[df['first']==True].inc.sum()+singles)
In [3]:
df = pd.read_csv('/Users/Peter/Downloads/gbro2.csv')
threshold = .25
location= 'Brown Ranch crop field, GBRO2'
In [4]:
df['date'] = pd.to_datetime(df['system:time_start'])
df.index = df['date']
del df['system:time_start']
df = df.dropna() # drop the rows without observations (masked)
df = df[df['date'].dt.year<2018] #chop off 2018 which EE didn't do
logs = df.groupby(df['date'].dt.year).apply(getLogs,th=threshold)
count = df.groupby(df['date'].dt.year).agg({'nd':'count'})
df = logs.to_frame().join(count)
df.columns = ['days of green','number of observations']
df['rolling mean']=df['days of green'].rolling(5).mean()
df
Out[4]:
In [5]:
df.plot(figsize=(15,10), grid=True, lw=5, title='Days of Landsat NDVI above '+str(threshold)+' at '+location)
Out[5]:
In [6]:
df = pd.read_csv('/Users/Peter/Downloads/gbro1.csv')
threshold = .25
location= 'Brown Ranch pasture, GBRO1'
In [7]:
df['date'] = pd.to_datetime(df['system:time_start'])
df.index = df['date']
del df['system:time_start']
df = df.dropna() # drop the rows without observations (masked)
df = df[df['date'].dt.year<2018] #chop off 2018 which EE didn't do
logs = df.groupby(df['date'].dt.year).apply(getLogs,th=threshold)
count = df.groupby(df['date'].dt.year).agg({'nd':'count'})
df = logs.to_frame().join(count)
df.columns = ['days of green','number of observations']
df['rolling mean']=df['days of green'].rolling(5).mean()
df
Out[7]:
In [8]:
df.plot(figsize=(15,10), grid=True, lw=5, title='Days of Landsat NDVI above '+str(threshold)+' at '+location)
Out[8]: